#### "How do past repression and indoctrination affect redistributive preferences?" ####
# authors: "Pelke, Lars"
# date: 2019-10-23
# written under "R version 3.6.0 (2019-03-11)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory

# loading packages

library(countrycode)
library(tidyverse)
library(viridis)
library(scales)

#### Import Data ####

#Load World value survey data

wvsdata <- readRDS("data/wvs_time_series/WVS_TimeSeries_R_v1_2.rds")
head(wvsdata)

#### Reduce Datasets ####

wvsdata<- wvsdata %>%
  select(X001, X002, X003, X011, X025, X025A2, X028, X047, X045, E035, E037, S020, S003, S025, S002)
  
summary(wvsdata)

#### Prepare WVS Variables for Empirical Analysis ####

wvsdata <- wvsdata %>%
  mutate(sex = ifelse(X001==1, 1,
                      ifelse(X001==2, 0, NA)))
table(wvsdata$sex)
sum(is.na(wvsdata$sex))

wvsdata <- wvsdata %>%
  mutate(age= X003) %>%
  mutate(age = ifelse(X003<0, NA, age))
summary(wvsdata$age)
sum(is.na(wvsdata$age))

wvsdata <- wvsdata %>%
  mutate(education= X025) %>%
  mutate(education = ifelse(X025<0, NA, education))
summary(wvsdata$education)
sum(is.na(wvsdata$education))

wvsdata_wave7 <- wvsdata %>%
  filter(S002 == 7)

wvsdata <- wvsdata %>%
  filter(S002 != 7 )

wvsdata_wave7 <- wvsdata_wave7 %>%
  mutate(education= X025A2) %>%
  mutate(education = ifelse(X025A2<0, NA, education))

summary(wvsdata_wave7$education)
sum(is.na(wvsdata_wave7$education))


wvsdata <- wvsdata %>%
  mutate(education_3 = ifelse(education<=3, 1, 
                              ifelse(education>=4 & education <= 6, 2, 3)))
summary(wvsdata$education_3)

wvsdata_wave7 <- wvsdata_wave7 %>%
  mutate(education_3 = ifelse(education<=1 , 1, 
                              ifelse(education>=2 & education <= 5 , 2, 3)))
summary(wvsdata_wave7$education_3)


wvsdata <- wvsdata %>%
  bind_rows(wvsdata_wave7)

rm(wvsdata_wave7)

summary(wvsdata$education_3)


wvsdata <- wvsdata %>%
  mutate(birth_year = ifelse(X002<0, NA, X002))
summary(wvsdata$birth_year)
sum(is.na(wvsdata$birth_year))

wvsdata <- wvsdata %>%
  mutate(income_deciles= X047) %>%
  mutate(income_deciles = ifelse(X047<0, NA, income_deciles))
summary(wvsdata$income_deciles)
sum(is.na(wvsdata$income_deciles))

wvsdata <- wvsdata %>%
  mutate(income_quintiles = ifelse(income_deciles==1, 1,
                                   ifelse(income_deciles==2, 1,
                                          ifelse(income_deciles==3, 2,
                                                 ifelse(income_deciles==4, 2,
                                                        ifelse(income_deciles==5, 3,
                                                               ifelse(income_deciles==6, 3,
                                                                      ifelse(income_deciles==7, 4,
                                                                             ifelse(income_deciles==8, 4,
                                                                                    ifelse(income_deciles==9, 5,
                                                                                           ifelse(income_deciles==10, 5, X047))))))))))) %>%
  mutate(income_quintiles = ifelse(X047<0, NA, income_quintiles))
summary(wvsdata$income_quintiles)
sum(is.na(wvsdata$income_quintiles))

wvsdata <- wvsdata %>%
  mutate(social_class = X045) %>%
  mutate(social_class = ifelse(X045<0, NA, social_class))
summary(wvsdata$social_class)
sum(is.na(wvsdata$social_class))

wvsdata <- wvsdata %>%
  mutate(children= ifelse(X011>0, 1,
                          ifelse(X011==0, 0, X011))) %>%
  mutate(children = ifelse(X011<0, NA, children))
summary(wvsdata$children)
sum(is.na(wvsdata$children))

wvsdata <- wvsdata %>%
  mutate(unemployed= ifelse(X028==7, 1,
                            ifelse(X028>=1 & X028<7, 0,
                                   ifelse(X028==8, 0, X028)))) %>%
  mutate(unemployed = ifelse(X028<0, NA, unemployed))
summary(wvsdata$unemployed)
sum(is.na(wvsdata$unemployed))

#### Mutating Dependent Variables ####

wvsdata <- wvsdata %>%
  mutate(income_equality= E035) %>%
  mutate(income_equality= ifelse(income_equality<0, NA, income_equality))
summary(wvsdata$income_equality)
sum(is.na(wvsdata$income_equality))

wvsdata <- wvsdata %>%
  mutate(government_resp= E037) %>%
  mutate(government_resp= ifelse(government_resp<0, NA, government_resp))
summary(wvsdata$government_resp)
sum(is.na(wvsdata$government_resp))

## Transforming Variables from less support to more support (1 = less support -> 10 most support)
wvsdata <- wvsdata %>%
  mutate(income_equality = if_else(income_equality==1, 10, 
                                   if_else(income_equality==2, 9, 
                                           if_else(income_equality==3, 8, 
                                                   if_else(income_equality==4, 7, 
                                                           if_else(income_equality==5, 6, 
                                                                   if_else(income_equality==6, 5,
                                                                           if_else(income_equality==7, 4,
                                                                                   if_else(income_equality==8, 3,
                                                                                           if_else(income_equality==9, 2,
                                                                                                   if_else(income_equality==10, 1,income_equality)))))))))))


table(wvsdata$government_resp)
table(wvsdata$income_equality)


#### Country Codes to World Values Survey Country Items ####

wvsdata$iso3n <- countrycode(wvsdata$S003, "wvs", "iso3n", warn = TRUE)
wvsdata %>% dplyr::filter(S003 ==c(446, 499, 688)) # 2357 observations in which it is not clear which country

wvsdata$year <- wvsdata$S020

wvsdata$wave <- wvsdata$S002

#### Age and Cohorts WVS ####

## cohort of respondents ##

wvsdata <- wvsdata %>%
  mutate(cohort = birth_year)
summary(wvsdata$birth_year)


wvsdata <- wvsdata %>%
  mutate(cohort_5 = cut(wvsdata$cohort, seq(1885, 2020, by = 5), right = F, labels = c(1885, 1890, 1895, 1900, 1905,
                                                                                       1910, 1915, 1920, 1925, 1930, 
                                                                                       1935, 1940, 1945, 1950, 1955,
                                                                                       1960, 1965, 1970, 1975, 1980,
                                                                                       1985, 1990, 1995, 2000, 2005, 
                                                                                       2010, 2015)
  )) 

wvsdata$cohort_5 <- as.numeric(as.character(wvsdata$cohort_5))

wvsdata <- wvsdata %>%
  mutate(cohortmatch5_15 = cohort_5 + 15) # 5-year cohorts year plus 15 years" socialization years

wvsdata <- wvsdata %>%
  mutate(cohortmatch5_20 = cohort_5 + 20) # 5-year cohorts year plus 20 years of socialization

table(wvsdata$cohort_5)
table(wvsdata$cohortmatch5_15)
table(wvsdata$cohortmatch5_20)

#### Generate Dataset ID ####

wvsdata <- wvsdata %>%
  mutate(data = "WVS")
table(wvsdata$data)

#################################################################################################################
#################################################################################################################

#### RECODING OF VARIABLES FOR HARMONIZATION OF DATASETS ####

# Sex: no recording nessevary 0: men, 1: women

# Social Class: from 1 (low) to 5 (high/upper)

table(wvsdata$social_class)
sum(is.na(wvsdata$social_class))

wvsdata <- wvsdata %>%
  mutate(social_class = if_else(social_class==1, 5, 
                                   if_else(social_class==2, 4, 
                                           if_else(social_class==3, 3, 
                                                   if_else(social_class==4, 2,
                                                           if_else(social_class==5, 1, social_class))))))
                                                          
table(wvsdata$social_class)
sum(is.na(wvsdata$social_class))

# Education category: into 3-three scale: done above at education_3

# unemployed: already done (binary scale)

# income: already done 

# children: already done (binary scale)

#### Rescale Dependent Variables ####

# income_equality

table(wvsdata$income_equality)

wvsdata$income_equality <- rescale(wvsdata$income_equality, to = c(0, 100))
table(wvsdata$income_equality)

# government_resp 

table(wvsdata$government_resp)
wvsdata$government_resp <- rescale(wvsdata$government_resp, to = c(0, 100))
table(wvsdata$government_resp)


mean.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(mean(v, na.rm=T)) }
}


overview <- wvsdata %>%
  group_by(wave, iso3n) %>%
  summarize(mean = mean.new(education_3))
  
#### SAVE DATASET ####

wvsdata <- wvsdata %>%
  dplyr::select(data, iso3n, year, wave, sex, age, education, education_3, birth_year, income_deciles, 
                income_quintiles, social_class, children, unemployed, income_equality, government_resp, 
                cohort_5, cohortmatch5_15, cohortmatch5_20)

saveRDS(wvsdata, file = "data/wvsdata_prepared.rds")





